---
title: "Cleaning LatinoBarometro"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load Candidate LatinoBarometro
  latinobarometro_1998_raw <- 
    read_dta("../raw-data/y-multi-latinobarometro/Latinobarometro_1998/Latinobarometro_1998.dta", encoding = "UTF-8")
```

# Clean LatinoBarometer 1998

```{r}
# declare dates (for 54 missing individual dates); imputed using (e.g.) range(latinobarometro_1998_clean$resp_interview_date[latinobarometro_1998_clean$resp_country_common == "Spain"], na.rm = T)
  dates <- 
    tribble(
      ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
      "Argentina", "1998-11-01", "1998-11-30",
      "Bolivia", "1998-11-20", "1998-11-29",
      "Chile", "1998-11-02", "1998-11-29",
      "Spain", "1998-12-01", "1998-12-14",
      ) %>% 
    mutate(
      resp_interview_start_date = as.Date(resp_interview_start_date),
      resp_interview_end_date = as.Date(resp_interview_end_date))

# clean
  latinobarometro_1998_clean <- 
    latinobarometro_1998_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "LatinoBarometero",
        
      # round number (character vector, title case)  
        resp_round = "1998",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3a5jW24",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = to_character(idenpa),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = make_date(numinves, mesreal, diareal)) %>% 
      left_join(
        dates, by = "resp_country_common") %>% 
      mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          dplyr::recode(
            as.character(sp80),
            "1" = "Christian",
            "2" = "Christian",
            "3" = "Christian",
            "4" = "Christian",
            "5" = "Christian",
            "6" = "Christian",
            "7" = "Christian",
            "8" = "Christian",
            "9" = "Jewish",
            "10" = "Christian",
            "11" = "Other religion",
            "12" = "Other religion",
            "13" = NA_character_,
            "14" = NA_character_,
            "15" = NA_character_,
            "16" = "Other religion",
            .default = NA_character_),

      # respondent's denomination (character vector that corresponds to master list)
        resp_denomination = to_character(sp80),
    
      # respondent's age
        resp_age =
          dplyr::recode(
            as.character(s2),
            "-2" = NA_character_),    
    
      # respondent's education level
        resp_education_original = 
          dplyr::recode(
            as.character(s14a),
            "1" = "1. Illiterate [No education]",
            "2" = "2. Incomplete primary [No education]",
            "3" = "3. Complete primary [Primary]",
            "4" = "4. Incomplete Secondary, technical [Primary]",
            "5" = "5. Complete Secondary, technical [Primary]",
            "6" = "6. Incomplete high [Primary]",
            "7" = "7. Complete high [College]",
            .default = NA_character_),
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = 
          case_when(
            s1 == 2 ~ 1,
            s1 == 1 ~ 0),
    
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural =
          case_when(
            tamciud %in% c(1) ~ 1,
            tamciud %in% c(2:8) ~ 0,
            TRUE ~ NA_real_),  
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: NP63F; QTEXT: In this list you will see various groups of people. Could you select any that you would not like to have as neighbours? Muslims.; ROPTIONS: 1 = Mentioned [=1] + 0 = Not mentioned [=0]; TARGET: Muslim; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = as.character(np63f),   

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = np63f,

    #########################  
    ### SOCIAL DISTANCE 2 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_2_qinfo = "NUM: NP63G; QTEXT: In this list you will see various groups of people. Could you select any that you would not like to have as neighbours? Jews.; ROPTIONS: 1 = Mentioned [=1] + 0 = Not mentioned [=0]; TARGET: Jewish; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_2_original = as.character(np63g),   

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_2_bin_recode = np63g,
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = "NUM: sp20; QTEXT: Generally speaking, would you say that you can trust most people, or that you can never be too careful when dealing with others?; ROPTIONS: 1 = You can trust most people [=0] + 2 = You can never be too careful when dealing with others [=1]",
    
      # original response (as character vector)
        resp_gentrust_original = dplyr::recode(
            as.character(sp20),
            "-4" = NA_character_,
            "-3" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = case_when(
            sp20 %in% c(1) ~ 0,
            sp20 %in% c(2) ~ 1,
            TRUE ~ NA_real_),  
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: sp81; QTEXT: Do you practice your religion?; ROPTIONS: 1 = Very much practising + 2 = Practising + 3 = Not very much practising + 4 = Not practising",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(sp81),
            "-4" = NA_real_,
            "-3" = NA_real_,
            "-2" = NA_real_,
            "-1" = NA_real_),      

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = (4 - resp_religiosity_original)/3,

    ) %>% 
    select(starts_with("resp_"))
```

# Save data

```{r}
  saveRDS(latinobarometro_1998_clean, "../cleaned-data/y-15-multi-latinobarometro.rds")
```